/*
 *
 *  * Copyright (C) 2015 Square, Inc.
 *  *
 *  * Licensed under the Apache License, Version 2.0 (the "License");
 *  * you may not use this file except in compliance with the License.
 *  * You may obtain a copy of the License at
 *  *
 *  *      http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS,
 *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  * See the License for the specific language governing permissions and
 *  * limitations under the License.
 *
 */
package com.lark.oapi.okio;

/**
 * Okio assumes most applications use UTF-8 exclusively, and offers optimized implementations of
 * common operations on UTF-8 strings.
 *
 * <p><table border="1" cellspacing="0" cellpadding="3" summary="">
 * <tr>
 * <th></th>
 * <th>{@link ByteString}</th>
 * <th>{@link Buffer}, {@link BufferedSink}, {@link BufferedSource}</th>
 * </tr>
 * <tr>
 * <td>Encode a string</td>
 * <td>{@link ByteString#encodeUtf8(String)}</td>
 * <td>{@link BufferedSink#writeUtf8(String)}</td>
 * </tr>
 * <tr>
 * <td>Encode a code point</td>
 * <td></td>
 * <td>{@link BufferedSink#writeUtf8CodePoint(int)}</td>
 * </tr>
 * <tr>
 * <td>Decode a string</td>
 * <td>{@link ByteString#utf8()}</td>
 * <td>{@link BufferedSource#readUtf8()}, {@link BufferedSource#readUtf8(long)}</td>
 * </tr>
 * <tr>
 * <td>Decode a code point</td>
 * <td></td>
 * <td>{@link BufferedSource#readUtf8CodePoint()}</td>
 * </tr>
 * <tr>
 * <td>Decode until the next {@code \r\n} or {@code \n}</td>
 * <td></td>
 * <td>{@link BufferedSource#readUtf8LineStrict()},
 * {@link BufferedSource#readUtf8LineStrict(long)}</td>
 * </tr>
 * <tr>
 * <td>Decode until the next {@code \r\n}, {@code \n}, or {@code EOF}</td>
 * <td></td>
 * <td>{@link BufferedSource#readUtf8Line()}</td>
 * </tr>
 * <tr>
 * <td>Measure the bytes in a UTF-8 string</td>
 * <td colspan="2">{@link Utf8#size}, {@link Utf8#size(String, int, int)}</td>
 * </tr>
 * </table>
 */
public final class Utf8 {

    private Utf8() {
    }

    /**
     * Returns the number of bytes used to encode {@code string} as UTF-8 when using {@link
     * ByteString#encodeUtf8} or {@link Buffer#writeUtf8(String)}.
     */
    public static long size(String string) {
        return size(string, 0, string.length());
    }

    /**
     * Returns the number of bytes used to encode the slice of {@code string} as UTF-8 when using
     * {@link BufferedSink#writeUtf8(String, int, int)}.
     */
    public static long size(String string, int beginIndex, int endIndex) {
        if (string == null) {
            throw new IllegalArgumentException("string == null");
        }
        if (beginIndex < 0) {
            throw new IllegalArgumentException("beginIndex < 0: " + beginIndex);
        }
        if (endIndex < beginIndex) {
            throw new IllegalArgumentException("endIndex < beginIndex: " + endIndex + " < " + beginIndex);
        }
        if (endIndex > string.length()) {
            throw new IllegalArgumentException(
                    "endIndex > string.length: " + endIndex + " > " + string.length());
        }

        long result = 0;
        for (int i = beginIndex; i < endIndex; ) {
            int c = string.charAt(i);

            if (c < 0x80) {
                // A 7-bit character with 1 byte.
                result++;
                i++;

            } else if (c < 0x800) {
                // An 11-bit character with 2 bytes.
                result += 2;
                i++;

            } else if (c < 0xd800 || c > 0xdfff) {
                // A 16-bit character with 3 bytes.
                result += 3;
                i++;

            } else {
                int low = i + 1 < endIndex ? string.charAt(i + 1) : 0;
                if (c > 0xdbff || low < 0xdc00 || low > 0xdfff) {
                    // A malformed surrogate, which yields '?'.
                    result++;
                    i++;

                } else {
                    // A 21-bit character with 4 bytes.
                    result += 4;
                    i += 2;
                }
            }
        }

        return result;
    }
}
